<!DOCTYPE html>
<html lang="zh">
<head>
  <meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=2">
<meta name="theme-color" content="#222">
<meta name="generator" content="Hexo 7.3.0">
  <link rel="apple-touch-icon" sizes="180x180" href="/images/apple-touch-icon-next.png">
  <link rel="icon" type="image/png" sizes="32x32" href="/images/favicon-32x32-next.png">
  <link rel="icon" type="image/png" sizes="16x16" href="/images/favicon-16x16-next.png">
  <link rel="mask-icon" href="/images/logo.svg" color="#222">

<link rel="stylesheet" href="/css/main.css">


<link rel="stylesheet" href="/lib/font-awesome/css/font-awesome.min.css">

<script id="hexo-configurations">
    var NexT = window.NexT || {};
    var CONFIG = {"hostname":"yiriso.fun","root":"/","scheme":"Pisces","version":"7.8.0","exturl":false,"sidebar":{"position":"left","display":"post","padding":18,"offset":12,"onmobile":false},"copycode":{"enable":false,"show_result":false,"style":null},"back2top":{"enable":true,"sidebar":false,"scrollpercent":true},"bookmark":{"enable":false,"color":"#222","save":"auto"},"fancybox":false,"mediumzoom":false,"lazyload":false,"pangu":false,"comments":{"style":"tabs","active":null,"storage":true,"lazyload":false,"nav":null},"algolia":{"hits":{"per_page":10},"labels":{"input_placeholder":"Search for Posts","hits_empty":"We didn't find any results for the search: ${query}","hits_stats":"${hits} results found in ${time} ms"}},"localsearch":{"enable":false,"trigger":"auto","top_n_per_article":1,"unescape":false,"preload":false},"motion":{"enable":true,"async":false,"transition":{"post_block":"fadeIn","post_header":"slideDownIn","post_body":"slideDownIn","coll_header":"slideLeftIn","sidebar":"slideUpIn"}},"path":"search.xml"};
  </script>

  <meta name="description" content="Pearson, Spearman, Kendall 是统计学上的三大重要相关系数，表示两个变量之间变化的趋势方向和趋势程度。下面对这三类系数做简单的介绍，同时给出我的推导方法。 三种公式均由如下一般公式推到得来，只是基于不同假设： \[ \rho &#x3D; \cfrac{\sum_{i&#x3D;1}^N (x_i-\bar{x}) (y_i-\bar{y})} {\sqrt{\sum_{i&#x3D;1}^N (x_i">
<meta property="og:type" content="article">
<meta property="og:title" content="Pearson, Spearman, Kendall 相关系数公式推导">
<meta property="og:url" content="http://yiriso.fun/2024/10/27/%E7%9B%B8%E5%85%B3%E7%B3%BB%E6%95%B0/index.html">
<meta property="og:site_name" content="Yiriso&#39;s blog">
<meta property="og:description" content="Pearson, Spearman, Kendall 是统计学上的三大重要相关系数，表示两个变量之间变化的趋势方向和趋势程度。下面对这三类系数做简单的介绍，同时给出我的推导方法。 三种公式均由如下一般公式推到得来，只是基于不同假设： \[ \rho &#x3D; \cfrac{\sum_{i&#x3D;1}^N (x_i-\bar{x}) (y_i-\bar{y})} {\sqrt{\sum_{i&#x3D;1}^N (x_i">
<meta property="og:locale">
<meta property="og:image" content="http://yiriso.fun/images/spearman_exmple.png">
<meta property="og:image" content="http://yiriso.fun/images/kandall.png">
<meta property="article:published_time" content="2024-10-27T08:45:01.672Z">
<meta property="article:modified_time" content="2024-10-27T08:45:01.673Z">
<meta property="article:author" content="Yiriso">
<meta property="article:tag" content="数理统计">
<meta property="article:tag" content="机器学习">
<meta name="twitter:card" content="summary">
<meta name="twitter:image" content="http://yiriso.fun/images/spearman_exmple.png">

<link rel="canonical" href="http://yiriso.fun/2024/10/27/%E7%9B%B8%E5%85%B3%E7%B3%BB%E6%95%B0/">


<script id="page-configurations">
  // https://hexo.io/docs/variables.html
  CONFIG.page = {
    sidebar: "",
    isHome : false,
    isPost : true,
    lang   : 'zh'
  };
</script>

  <title>Pearson, Spearman, Kendall 相关系数公式推导 | Yiriso's blog</title>
  






  <noscript>
  <style>
  .use-motion .brand,
  .use-motion .menu-item,
  .sidebar-inner,
  .use-motion .post-block,
  .use-motion .pagination,
  .use-motion .comments,
  .use-motion .post-header,
  .use-motion .post-body,
  .use-motion .collection-header { opacity: initial; }

  .use-motion .site-title,
  .use-motion .site-subtitle {
    opacity: initial;
    top: initial;
  }

  .use-motion .logo-line-before i { left: initial; }
  .use-motion .logo-line-after i { right: initial; }
  </style>
</noscript>


<style>.github-emoji { position: relative; display: inline-block; width: 1.2em; min-height: 1.2em; overflow: hidden; vertical-align: top; color: transparent; }  .github-emoji > span { position: relative; z-index: 10; }  .github-emoji img, .github-emoji .fancybox { margin: 0 !important; padding: 0 !important; border: none !important; outline: none !important; text-decoration: none !important; user-select: none !important; cursor: auto !important; }  .github-emoji img { height: 1.2em !important; width: 1.2em !important; position: absolute !important; left: 50% !important; top: 50% !important; transform: translate(-50%, -50%) !important; user-select: none !important; cursor: auto !important; } .github-emoji-fallback { color: inherit; } .github-emoji-fallback img { opacity: 0 !important; }</style>
<link rel="alternate" href="/atom.xml" title="Yiriso's blog" type="application/atom+xml">
</head>

<body itemscope itemtype="http://schema.org/WebPage">
  <div class="container use-motion">
    <div class="headband"></div>

    <header class="header" itemscope itemtype="http://schema.org/WPHeader">
      <div class="header-inner"><div class="site-brand-container">
  <div class="site-nav-toggle">
    <div class="toggle" aria-label="Toggle navigation bar">
      <span class="toggle-line toggle-line-first"></span>
      <span class="toggle-line toggle-line-middle"></span>
      <span class="toggle-line toggle-line-last"></span>
    </div>
  </div>

  <div class="site-meta">

    <a href="/" class="brand" rel="start">
      <span class="logo-line-before"><i></i></span>
      <h1 class="site-title">Yiriso's blog</h1>
      <span class="logo-line-after"><i></i></span>
    </a>
  </div>

  <div class="site-nav-right">
    <div class="toggle popup-trigger">
    </div>
  </div>
</div>




<nav class="site-nav">
  <ul id="menu" class="menu">
        <li class="menu-item menu-item-home">

    <a href="/" rel="section"><i class="fa fa-fw fa-home"></i>Home</a>

  </li>
        <li class="menu-item menu-item-archives">

    <a href="/archives/" rel="section"><i class="fa fa-fw fa-archive"></i>Archives</a>

  </li>
  </ul>
</nav>




</div>
    </header>

    
  <div class="back-to-top">
    <i class="fa fa-arrow-up"></i>
    <span>0%</span>
  </div>


    <main class="main">
      <div class="main-inner">
        <div class="content-wrap">
          

          <div class="content post posts-expand">
            

    
  
  
  <article itemscope itemtype="http://schema.org/Article" class="post-block" lang="zh">
    <link itemprop="mainEntityOfPage" href="http://yiriso.fun/2024/10/27/%E7%9B%B8%E5%85%B3%E7%B3%BB%E6%95%B0/">

    <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
      <meta itemprop="image" content="/images/avatar.gif">
      <meta itemprop="name" content="Yiriso">
      <meta itemprop="description" content="以生物信息学习分享为主">
    </span>

    <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="Yiriso's blog">
    </span>
      <header class="post-header">
        <h1 class="post-title" itemprop="name headline">
          Pearson, Spearman, Kendall 相关系数公式推导
        </h1>

        <div class="post-meta">
            <span class="post-meta-item">
              <span class="post-meta-item-icon">
                <i class="fa fa-calendar-o"></i>
              </span>
              <span class="post-meta-item-text">Posted on</span>

              <time title="Created: 2024-10-27 08:45:01" itemprop="dateCreated datePublished" datetime="2024-10-27T08:45:01+00:00">2024-10-27</time>
            </span>

          

        </div>
      </header>

    
    
    
    <div class="post-body" itemprop="articleBody">

      
        <p>Pearson, Spearman, Kendall
是统计学上的三大重要相关系数，表示两个变量之间变化的趋势方向和趋势程度。下面对这三类系数做简单的介绍，同时给出我的推导方法。
三种公式均由如下一般公式推到得来，只是基于不同假设： <span class="math display">\[
\rho = \cfrac{\sum_{i=1}^N (x_i-\bar{x}) (y_i-\bar{y})}
{\sqrt{\sum_{i=1}^N (x_i-\bar{x})^2 \sum_{i=1}^N(y_i-\bar{y})^2}}
\]</span></p>
<h2 id="pearson-皮尔逊相关系数连续变量">Pearson
皮尔逊相关系数（连续变量）</h2>
<p>假设条件： 1. 服从正态分布，一般通过t检验 2. 标准差不为0 <span class="math display">\[
\rho_{X,Y}=\cfrac{\text{cov}(X,Y)}{\sigma_{X}\sigma_{Y}}
\]</span></p>
<p>线性相关系数，-1 到 1 ，
是对两者间<strong>线性关系</strong>的判别，较为严格。</p>
<h2 id="spearman-斯皮尔曼相关系数秩和相关系数-连续变量">Spearman
斯皮尔曼相关系数/秩和相关系数 （连续变量）</h2>
<p>秩（rank）用于表示数据之间的排序关系，对于大小相同的值采用<strong>平均秩（加粗）</strong>。
Spearman 本质是基于<strong>秩差</strong>的相关性分析 例子：<a target="_blank" rel="noopener" href="https://zhuanlan.zhihu.com/p/170354683">link</a> <img src="/images/spearman_exmple.png" alt="Alt text"></p>
<table>
<thead>
<tr class="header">
<th style="text-align: center;">x</th>
<th style="text-align: center;">R(x)</th>
<th style="text-align: center;">y</th>
<th style="text-align: center;">R(y)</th>
<th style="text-align: center;">|d|</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td style="text-align: center;">10.2</td>
<td style="text-align: center;"><strong>5.5</strong></td>
<td style="text-align: center;">9.5</td>
<td style="text-align: center;">3</td>
<td style="text-align: center;">2.5</td>
</tr>
<tr class="even">
<td style="text-align: center;">9.6</td>
<td style="text-align: center;">2</td>
<td style="text-align: center;">9.8</td>
<td style="text-align: center;"><strong>4.5</strong></td>
<td style="text-align: center;">2.5</td>
</tr>
<tr class="odd">
<td style="text-align: center;">9.2</td>
<td style="text-align: center;">1</td>
<td style="text-align: center;">8.8</td>
<td style="text-align: center;">1</td>
<td style="text-align: center;">0</td>
</tr>
<tr class="even">
<td style="text-align: center;">10.6</td>
<td style="text-align: center;"><strong>8</strong></td>
<td style="text-align: center;">10.1</td>
<td style="text-align: center;">7</td>
<td style="text-align: center;">1</td>
</tr>
<tr class="odd">
<td style="text-align: center;">9.9</td>
<td style="text-align: center;">3</td>
<td style="text-align: center;">10.3</td>
<td style="text-align: center;">9</td>
<td style="text-align: center;">6</td>
</tr>
<tr class="even">
<td style="text-align: center;">10.2</td>
<td style="text-align: center;"><strong>5.5</strong></td>
<td style="text-align: center;">9.3</td>
<td style="text-align: center;">2</td>
<td style="text-align: center;">3.5</td>
</tr>
<tr class="odd">
<td style="text-align: center;">10.6</td>
<td style="text-align: center;"><strong>8</strong></td>
<td style="text-align: center;">10.5</td>
<td style="text-align: center;">10</td>
<td style="text-align: center;">2</td>
</tr>
<tr class="even">
<td style="text-align: center;">10.0</td>
<td style="text-align: center;">4</td>
<td style="text-align: center;">10.0</td>
<td style="text-align: center;">6</td>
<td style="text-align: center;">2</td>
</tr>
<tr class="odd">
<td style="text-align: center;">11.2</td>
<td style="text-align: center;">11</td>
<td style="text-align: center;">10.6</td>
<td style="text-align: center;">11</td>
<td style="text-align: center;">0</td>
</tr>
<tr class="even">
<td style="text-align: center;">10.7</td>
<td style="text-align: center;">10</td>
<td style="text-align: center;">10.2</td>
<td style="text-align: center;">8</td>
<td style="text-align: center;">2</td>
</tr>
<tr class="odd">
<td style="text-align: center;">10.6</td>
<td style="text-align: center;"><strong>8</strong></td>
<td style="text-align: center;">9.8</td>
<td style="text-align: center;"><strong>4.5</strong></td>
<td style="text-align: center;">3.5</td>
</tr>
</tbody>
</table>
<p><span class="math display">\[
\rho_{spearman} = 1 - \cfrac{6\sum d_i^2}{n(n^2-1)}
\]</span></p>
<p>以上例子中 <span class="math inline">\(\rho = 0.6091\)</span>
排序相关系数，-1 到 1 ，
是对两者间<strong>单调关系</strong>的判别，较宽松。</p>
<h3 id="数学推导">数学推导</h3>
<p>对于相关系数均可以由 <a href="#相关系数">一般公式</a>
推导得，没怎么找到合适的表述，就自己简单的在这推一下。就结论而言，在不考虑平均秩的情况下两者等价。
1. 不考虑平均秩，对于秩有以下特点： <span class="math display">\[
   \bar{x} = \bar{y} = \cfrac{n+1}{2}\\
   \sum_{i=1}^n x_i = \sum_{i=1}^N y_i = \sum_{i=1}^n i =
\cfrac{n(n+1)}{2} \\
   \sum_{i=1}^n x_i^2 = \sum_{i=1}^n y_i^2  
   =  \sum_{i=1}^n i^2 = \cfrac{n(n+1)(2n+1)}{6}
   \]</span> 2. 则对与 <span class="math inline">\(\rho\)</span> 可得：
<span class="math display">\[
   \begin{aligned}
       \rho
       &amp;= \cfrac{\sum_{i=1}^N (x_i-\cfrac{n+1}{2})
(y_i-\cfrac{n+1}{2})}{\sum_{i=1}^N (i-\cfrac{n+1}{2})^2}\\
       &amp;= \cfrac{\sum_{i=1}^N x_iy_i -
\cfrac{n(n+1)}{2}(x_i+y_i)+\cfrac{(n+1)^2}{4}}{\sum_{i=1}^N i^2
-n(n+1)i+\cfrac{(n+1)^2}{4}}\\
       &amp;= \cfrac{\cfrac{n(n+1)^2}{4}-n(n+1)\sum_{i=1}^N
i+\sum_{i=1}^N x_iy_i}
       {\cfrac{n(n+1)^2}{4}-n(n+1)\sum_{i=1}^N i+\sum_{i=1}^N i^2}
   \end{aligned}
   \]</span> 3. 考虑到： <span class="math display">\[
   \sum_{i=1}^N (x_i-y_i)^2 = \sum_{i=1}^N x_i^2- 2x_iy_i +y_i^2 =
2\sum_{i=1}^N i^2 -2\sum_{i=1}^N x_iy_i
   \]</span> 4. 先算一下分母 <span class="math display">\[
   \begin{aligned}
       Z
       &amp;= \cfrac{n(n+1)^2}{4}-n(n+1)\sum_{i=1}^N i+\sum_{i=1}^N i^2
\\
       &amp;= \cfrac{n(n+1)^2}{4}-\cfrac{n(n+1)^2}{2}
+\cfrac{n(n+1)(2n+1)}{6} \\
       &amp;= \cfrac{n(n+1)(n-1)}{12}
   \end{aligned}
   \]</span> 5. 综上 <span class="math display">\[
   \begin{aligned}
    \rho
    &amp;=\cfrac{\cfrac{n(n+1)^2}{4}-n(n+1)\sum_{i=1}^N i+ \sum_{i=1}^N
i^2 -\cfrac{1}{2}\sum_{i=1}^N
(x_i-y_i)^2}{\cfrac{n(n+1)^2}{4}-n(n+1)\sum_{i=1}^N i+\sum_{i=1}^N
i^2}\\
    &amp;= \cfrac{Z-\cfrac{1}{2}\sum_{i=1}^N (x_i-y_i)^2}{Z}\\
    &amp;= 1-\cfrac{6\sum_{i=1}^N d_i^2}{n(n^2-1)}
   \end{aligned}
   \]</span></p>
<p>考虑平均秩的情况，会导致 <span class="math inline">\(\sum_{i=1}^n
x_i^2 \neq \sum_{i=1}^n y_i^2 \neq \sum_{i=1}^n i^2\)</span> 此时有：
<span class="math display">\[
\begin{aligned}
    \sum_{i=1}^N (x_i-y_i)^2 &amp;= \sum_{i=1}^N (x_i^2+y_i^2) -
2\sum_{i=1}^nx_iy_i \\
    \sum_{i=1}^N (x_i-\bar{x})^2 &amp;=
-\cfrac{n(n+1)^2}{4}+\sum_{i=1}^N x_i^2
\end{aligned}
\]</span></p>
<p>所以 <span class="math display">\[
\rho = \cfrac
{-\cfrac{n(n+1)^2}{4}+\cfrac{1}{2}\sum_{i=1}^N
(x_i^2+y_i^2)-\cfrac{1}{2}\sum_{i=1}^N (x_i-y_i)^2}
{\sqrt{\left(-\cfrac{n(n+1)^2}{4}+\sum_{i=1}^N x_i^2
\right)\left(-\cfrac{n(n+1)^2}{4}+\sum_{i=1}^N y_i^2 \right)}}
\]</span></p>
<p>考虑存在平均秩的编秩 <span class="math inline">\(t\)</span>： <span class="math display">\[
\begin{aligned}
    \sum_{i=1}^{n}t_i^2 &amp;= \sum_{非平均秩项}t_i^2 +
\sum_{m=1}^{M}\sum_{j=1}^{N_m}\bar{t_m}^2 \;\Leftarrow
\small{平均秩项}\\
    &amp;=\sum_{非平均秩项}t_i^2 + \sum_{m=1}^{M}N_m\bar{t_m}^2\\
    \small{均值不等式A_n \leqslant Q_n}\Rightarrow &amp;\leqslant
\sum_{i=1}^n i^2
\end{aligned}
\]</span></p>
<p>记：<span class="math inline">\(\Delta_x = \sum_{i=1}^n i^2 -
\sum_{i=1}^n x^2 &gt;0,\;\Delta_y = \sum_{i=1}^n i^2 - \sum_{i=1}^n
y^2&gt;0\)</span> <span class="math display">\[
\begin{aligned}
    \rho(\Delta_x,\Delta_y) &amp;= \cfrac{Z-\cfrac{1}{2}\sum_{i=1}^N
d_i^2 -\cfrac{\Delta_x+\Delta_y}{2}}{\sqrt{(Z-\Delta_x)(Z-\Delta_y)}}\\
    &amp;= \cfrac{A-\Delta_x-\Delta_y}{2\sqrt{(Z-\Delta_x)(Z-\Delta_y)}}
&amp;A=2Z-\sum_{i=1}^N d_i^2\\
    \rho(0,0) &amp;= \rho_{spearman}
\end{aligned}
\]</span></p>
<p>易得 <span class="math inline">\(\Delta_x,\Delta_y\)</span> 远小于
<span class="math inline">\(Z\)</span> ,则偏导有如下关系： <span class="math display">\[
\begin{cases}
    \cfrac{\partial\rho}{\partial\Delta_x} =
    -\cfrac{\sqrt{Z-\Delta_y}\left(\sqrt{Z-\Delta_x}-\cfrac{\Delta_x}{2\sqrt{Z-\Delta_x}}\right)}{2(Z-\Delta_x)(Z-\Delta_y)}&lt;0\\
    \cfrac{\partial\rho}{\partial\Delta_y} =
    -\cfrac{\sqrt{Z-\Delta_x}\left(\sqrt{Z-\Delta_y}-\cfrac{\Delta_y}{2\sqrt{Z-\Delta_y}}\right)}{2(Z-\Delta_x)(Z-\Delta_y)}&lt;0\\
\end{cases}
\]</span></p>
<p>综上所述，有如下不等关系，当且仅当无平均秩项时取等。 <span class="math display">\[
\rho_{spearman} \geqslant\ \rho
\]</span></p>
<h2 id="kendall-肯德尔相关系数">Kendall 肯德尔相关系数</h2>
<p><a target="_blank" rel="noopener" href="https://blog.csdn.net/chenxy_bwave/article/details/126919019">link</a></p>
<p>参考文献：Maurice G. Kendall, “The treatment of ties in ranking
problems”, Biometrika Vol. 33, No. 3, pp. 239-251. 1945.</p>
<p>用于有序分类的数据（等级评分），为一种秩相关系数。
基于样本<strong>数据对</strong>之间的关系来进行相关系数的强弱的分析，数据对可以分为<strong>一致对(Concordant)</strong>和<strong>分歧对(Discordant)</strong>。</p>
<ol type="1">
<li>Tua-a <span class="math display">\[
\tau_a = 2\cfrac{c-d'}{n(n-1)}
\]</span></li>
</ol>
<p><span class="math inline">\(c\)</span> 表示一致的对数 <span class="math inline">\((1,1) \Leftrightarrow(2,2)\)</span> <span class="math inline">\(d'\)</span> 表示非一致的对数 （<span class="math inline">\(d'+c = \cfrac{n(n-1)}{2}\)</span>）</p>
<ol start="2" type="1">
<li>Tua-b 处理相同排位 <span class="math display">\[
\tau_b = \cfrac{c-d}{\sqrt{(c+d+t_x)(c+d+t_y)}}
\]</span></li>
</ol>
<p><span class="math inline">\(c\)</span> 表示一致的对数 <span class="math inline">\((1,1) \Leftrightarrow(2,2)\)</span> <span class="math inline">\(d\)</span> 表示相反的对数 <span class="math inline">\((1,2) \Leftrightarrow(2,1)\)</span> <span class="math inline">\(t_x\)</span> 表示 <span class="math inline">\(x\)</span> 分量不同的对数 <span class="math inline">\((1,2) \Leftrightarrow(2,2)\;\text{OR}\;(2,2)
\Leftrightarrow(1,2)\)</span> <span class="math inline">\(t_y\)</span>
表示 <span class="math inline">\(y\)</span> 分量不同的对数 <span class="math inline">\((1,1) \Leftrightarrow(1,2)\;\text{OR}\;(1,2)
\Leftrightarrow(1,1)\)</span> <img src="/images/kandall.png" alt="Alt text"></p>
<ol start="3" type="1">
<li>Tua-c 为 Tua-b的简化版 <span class="math display">\[
\tau_c = \cfrac{2m(c - d)}{ n^2 (m - 1)}
\]</span> <span class="math inline">\(n\)</span> 表示总样品数目 <span class="math inline">\(m\)</span> 表示 x，y 分类下较小的分类数</li>
</ol>
<p>这里举一例说明，列出如下“棋盘”：</p>
<table>
<thead>
<tr class="header">
<th></th>
<th>(1,1)</th>
<th>(1,2)</th>
<th>(1,2)</th>
<th>(2,3)</th>
<th>(3,3)</th>
<th>(3,2)</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td>(1,1)</td>
<td>-</td>
<td>y</td>
<td>y</td>
<td>xy</td>
<td>xy</td>
<td>xy</td>
</tr>
<tr class="even">
<td>(1,2)</td>
<td>-</td>
<td>-</td>
<td>0</td>
<td>xy</td>
<td>xy</td>
<td>x</td>
</tr>
<tr class="odd">
<td>(1,2)</td>
<td>-</td>
<td>-</td>
<td>-</td>
<td>xy</td>
<td>xy</td>
<td>x</td>
</tr>
<tr class="even">
<td>(2,3)</td>
<td>-</td>
<td>-</td>
<td>-</td>
<td>-</td>
<td>x</td>
<td>-xy</td>
</tr>
<tr class="odd">
<td>(3,3)</td>
<td>-</td>
<td>-</td>
<td>-</td>
<td>-</td>
<td>-</td>
<td>y</td>
</tr>
<tr class="even">
<td>(3,2)</td>
<td>-</td>
<td>-</td>
<td>-</td>
<td>-</td>
<td>-</td>
<td>-</td>
</tr>
</tbody>
</table>
<p>x表示x分量不同，y表示y分量不同，xy表示一致的对，-xy表示相反的对。</p>
<p>则有：<span class="math inline">\(t_x=3,t_y=3,c=7,d=1\)</span>
得到：<span class="math inline">\(\tau_a =\cfrac{7-8}{15}=-\cfrac{1}{15}
\;,\;\tau_b=\cfrac{6}{11}\;,\;\tau_c=\cfrac{1}{2}\)</span></p>
<p>对于有序分类问题一般采用 <span class="math inline">\(\tau_b\)</span>
作为相关系数</p>
<h3 id="数学推导-1">数学推导</h3>
<p>同样没有找到合适数学表述（参考文献倒是给推导但是太过繁琐），这里我就简单推导一下。
<strong>基本思路</strong>：<a href="#相关系数">一般公式</a> <span class="math inline">\(\rightarrow\)</span> tau-b <span class="math inline">\(\rightarrow\)</span> tau-c <span class="math inline">\(\rightarrow\)</span> tau-a</p>
<ol type="1">
<li><p>使用符号函数计数： <span class="math display">\[
\begin{aligned}
c-d &amp;=\cfrac{1}{2}\sum_{i,j}\text{sgn}(x_i-x_j)\text{sgn}(y_i-y_j)\\
c+d+t_x &amp;= \cfrac{1}{2}\sum_{i,j}\text{sgn}^2(x_i-x_j)\\
c+d+t_y &amp;= \cfrac{1}{2}\sum_{i,j}\text{sgn}^2(y_i-y_j)
\end{aligned}
\]</span></p>
<p>这里，将 <span class="math inline">\(\sum_{i=1}^n\sum_{j=1}^n\)</span> 简写为 <span class="math inline">\(\sum_{i,j}\)</span>，不难发现：当 <span class="math inline">\(i=j\)</span> 时有 <span class="math inline">\(\text{sgn}(x_i-x_j)=\text{sgn}(y_i-y_j)=0\)</span>
这就解释了系数 0.5 ，这里结合 <strong>”棋盘“</strong>
会更好理解就不过多阐述。</p></li>
<li><p><a href="#相关系数">一般公式</a> <span class="math inline">\(\rightarrow\)</span> tau-b 需要提前声明的是 <span class="math inline">\(\tau_b\)</span>
并不是一般公式的严格导出，但采取了相同的思想。
Kendall系数作为使用符号计数方式改造了一般公式，以下公式可以给出直观理解：
<span class="math display">\[
\begin{aligned}
\rho &amp;= \cfrac{\sum_{i=1}^N (x_i-\bar{x})
(y_i-\bar{y})}{\sqrt{\sum_{i=1}^N (x_i-\bar{x})^2
\sum_{i=1}^N(y_i-\bar{y})^2}}\\
\tau_b &amp;= \cfrac{c-d}{\sqrt{(c+d+t_x)(c+d+t_y)}} \\
&amp;=\cfrac{\sum_{i,j}\text{sgn}(x_i-x_j)\text{sgn}(y_i-y_j)}
{\sqrt{\sum_{i,j}\text{sgn}^2(x_i-x_j)\sum_{i,j}\text{sgn}^2(y_i-y_j)}}
\end{aligned}
\]</span></p>
<p>论文中（10）式也给出 <span class="math inline">\(\tau\)</span>
的定义。这里记 <span class="math inline">\(a_{ij}=\text{sgn}(x_i-x_j),b_{ij} =
\text{sgn}(y_i-y_j)\)</span>
论文中并没有采用符号函数的记法，但意思是一样的。 <span class="math display">\[
\tau = \cfrac{\sum a_{ij}b_{ij}}{\sqrt{\sum a_{ij}^2\sum b_{ij}^2}}
\]</span></p></li>
<li><p>tau-b <span class="math inline">\(\rightarrow\)</span> tau-c <a target="_blank" rel="noopener" href="https://blog.csdn.net/ChenVast/article/details/83023053">link</a>
tau-c 比 tau-b
更适合于基于<strong>非正方形列联表</strong>的数据分析。例如，一个变量可能在5分制（非常好，好，平均，差，非常差）上得分，而另一个可能基于更精细的10分制。</p>
<p>tau-b 与 tau-c 的差别主要体现在标准化到 [-1,1] 的方式不同，tau-b
借用相关系数的思想通过方差实现标准化，而 tau-c 则基于计数的方式使用
<span class="math inline">\(\max\)</span> 与 <span class="math inline">\(\min\)</span>
的思想实现标准化。（这里和论文中的方法不同，论文中采取value值的解释我也不是很理解）</p>
<p>以下是我的推导方法：</p>
<ol type="1">
<li><p>基本思路 首先我们将注意力集中到 <span class="math inline">\(\sum
a_{ij}^2\)</span>
上很明显这时个耗时的操作，同时，在有序分类中会出现大量的重复计算，这里我们的目标其实就是使用一种更简单方式得到基本等效的值。考虑到
<span class="math inline">\(\tau \in [-1,1]\)</span>
我们就可以从其上下界入手，<span class="math inline">\(\max,\min \sum
a_{ij}b_{ij}\)</span></p></li>
<li><p>矩阵方法
为了方便下一步的讨论，我们将一些需要的数据化成矩阵方便讨论。 <span class="math display">\[
A =
\begin{bmatrix}
a_{11}^2 &amp; \cdots &amp; a_{1n}^2  \\
\vdots &amp; \ddots &amp; \vdots \\
a_{n1}^2  &amp; \cdots &amp; a_{nn}^2
\end{bmatrix}
\;,\;
B =
\begin{bmatrix}
b_{11}^2 &amp; \cdots &amp; b_{1n}^2  \\
\vdots &amp; \ddots &amp; \vdots \\
b_{n1}^2  &amp; \cdots &amp; b_{nn}^2
\end{bmatrix}
\;,\;
C =
\begin{bmatrix}
a_{11}b_{11} &amp; \cdots &amp; a_{1n}b_{1n}  \\
\vdots &amp; \ddots &amp; \vdots \\
a_{n1}b_{n1}  &amp; \cdots &amp; a_{nn}b_{nn}
\end{bmatrix}
\]</span></p>
<p>不难发现：<span class="math inline">\(\text{rank}(C) = \min
\{\text{rank}(A),\text{rank}(B)\}\)</span></p></li>
<li><p>矩阵的秩——分类数——序列的秩
以上的关系中我已经隐约可以发现三者之间的关系，这里举个例子：对与分类序列
<span class="math inline">\(v = (1,1,3,3,5,5)\)</span> ，分类数 <span class="math inline">\(m=3\)</span> ,仅考虑大小关系的编秩得 <span class="math inline">\(R(v) = (1,1,2,2,3,3)\)</span>
，同时参考以上对矩阵的定义不难得到: <span class="math display">\[
V =
\begin{bmatrix}
0&amp;0&amp;1&amp;1&amp;1&amp;1 \\
0&amp;0&amp;1&amp;1&amp;1&amp;1 \\
1&amp;1&amp;0&amp;0&amp;1&amp;1 \\
1&amp;1&amp;0&amp;0&amp;1&amp;1 \\
1&amp;1&amp;1&amp;1&amp;0&amp;0 \\
1&amp;1&amp;1&amp;1&amp;0&amp;0 \\
\end{bmatrix}\\
\text{rank}(V) = 3
\]</span></p>
<p>此时我们就有了：<span class="math inline">\(\text{rank}(X) =
m\)</span></p></li>
<li><p>最大化目标 对于 <span class="math inline">\(C\)</span>
中的任意一项必须满足 <span class="math inline">\(A\)</span> 和 <span class="math inline">\(B\)</span> 中的对应一项不同时为零，同时假设 <span class="math inline">\(\text{rank}(A)&lt;\text{rank}(B)\)</span> 易得:
<span class="math display">\[
\lvert \sum a_{ij}b_{ij} \rvert &lt; \sum a_{ij}^2 &lt; \sum b_{ij}^2
\]</span> 那么我们就将 <span class="math inline">\(\tau\)</span>
简化为如下的形式： <span class="math display">\[
\tau_c = \cfrac{2(c-d)}{\max \sum a_{ij}^2}
\]</span></p></li>
<li><p>上界确定 这里让我们先从上面的例子出发来寻找一下 <span class="math inline">\(\max \sum a_{ij}^2\)</span>，即为矩阵中 <span class="math inline">\(1\)</span> 的数目。 <span class="math display">\[
V = \begin{bmatrix}
O&amp;I&amp;I \\
I&amp;O&amp;I \\
I&amp;I&amp;O \\
\end{bmatrix}
\]</span></p>
<p>这里记 <span class="math inline">\(O\)</span> 为零方阵 <span class="math inline">\(I\)</span> 为元素全为 <span class="math inline">\(1\)</span> 的方阵，不难发现我们只要要求 <span class="math inline">\(O\)</span>
所含的元素最少，稍作扩展到一般情况就有如下规划模型： <span class="math display">\[
\min \sum_{i=1}^m k_i^2\\
\text{s.t.} \sum_{i=1}^m k_i = n
\]</span></p>
<p>其中 <span class="math inline">\(k_i\)</span> 表示第 <span class="math inline">\(i\)</span>
类的样本量。可以使用拉格朗日乘数法求解，其实由对称性就可以给出解： <span class="math display">\[
\text{when}\;k_1=k_2=\dots = k_n = \cfrac{n}{m}\\
\min \sum_{i=1}^m k_i^2 = m \cdot \cfrac{n^2}{m^2} = \cfrac{n^2}{m}
\]</span></p></li>
<li><p>整理 此时已经解出上界： <span class="math display">\[
\max \sum a_{ij}^2 = n^2-\cfrac{n^2}{m} = \cfrac{n^2(m-1)}{m}
\]</span> 代入得： <span class="math display">\[
\tau_c = \cfrac{2m(c - d)}{ n^2 (m - 1)}
\]</span></p></li>
</ol></li>
<li><p>tau-c <span class="math inline">\(\rightarrow\)</span> tau-a
这一步的简化要求最小分类数 <span class="math inline">\(m \approx
n\)</span> ，其实是<strong>丧失了有序分类的特征</strong>。 同时在 <span class="math inline">\(m \approx n\)</span>
的条件下，由于分类过细导致相对较少的出现相同项或是具有相同分量的项，此时就有
<span class="math inline">\(d'\approx d\)</span>
，可以将tau-c进一步简化： <span class="math display">\[
\tau_a = \cfrac{2(c-d')}{n(n-1)} \approx \cfrac{2m(c - d)}{ n^2 (m -
1)} = \tau_c
\]</span></p>
<p>在我的理解中，这里还可以朴素的理解为数据对一致与分歧频率差： <span class="math display">\[
N =C_n^2 =\cfrac{A_n^2}{A_2^2}= \cfrac{n(n-1)}{2} \\
v_c = \cfrac{N_c}{N} = \cfrac{2c}{n(n-1)}\\
v_d = \cfrac{N_{d'}}{N} = \cfrac{2d'}{n(n-1)}\\
\tau_a = v_c-v_d = \cfrac{2(c-d')}{n(n-1)}
\]</span></p></li>
</ol>

    </div>

    
    
    

      <footer class="post-footer">
          <div class="post-tags">
              <a href="/tags/%E6%95%B0%E7%90%86%E7%BB%9F%E8%AE%A1/" rel="tag"># 数理统计</a>
              <a href="/tags/%E6%9C%BA%E5%99%A8%E5%AD%A6%E4%B9%A0/" rel="tag"># 机器学习</a>
          </div>

        


        
    <div class="post-nav">
      <div class="post-nav-item">
    <a href="/2024/10/26/%E6%95%B0%E9%A1%B9%E7%BA%A7%E6%95%B0%E5%AE%A1%E6%95%9B%E9%97%AE%E9%A2%98/" rel="prev" title="数项级数审敛问题">
      <i class="fa fa-chevron-left"></i> 数项级数审敛问题
    </a></div>
      <div class="post-nav-item"></div>
    </div>
      </footer>
    
  </article>
  
  
  



          </div>
          

<script>
  window.addEventListener('tabs:register', () => {
    let { activeClass } = CONFIG.comments;
    if (CONFIG.comments.storage) {
      activeClass = localStorage.getItem('comments_active') || activeClass;
    }
    if (activeClass) {
      let activeTab = document.querySelector(`a[href="#comment-${activeClass}"]`);
      if (activeTab) {
        activeTab.click();
      }
    }
  });
  if (CONFIG.comments.storage) {
    window.addEventListener('tabs:click', event => {
      if (!event.target.matches('.tabs-comment .tab-content .tab-pane')) return;
      let commentClass = event.target.classList[1];
      localStorage.setItem('comments_active', commentClass);
    });
  }
</script>

        </div>
          
  
  <div class="toggle sidebar-toggle">
    <span class="toggle-line toggle-line-first"></span>
    <span class="toggle-line toggle-line-middle"></span>
    <span class="toggle-line toggle-line-last"></span>
  </div>

  <aside class="sidebar">
    <div class="sidebar-inner">

      <ul class="sidebar-nav motion-element">
        <li class="sidebar-nav-toc">
          Table of Contents
        </li>
        <li class="sidebar-nav-overview">
          Overview
        </li>
      </ul>

      <!--noindex-->
      <div class="post-toc-wrap sidebar-panel">
          <div class="post-toc motion-element"><ol class="nav"><li class="nav-item nav-level-2"><a class="nav-link" href="#pearson-%E7%9A%AE%E5%B0%94%E9%80%8A%E7%9B%B8%E5%85%B3%E7%B3%BB%E6%95%B0%E8%BF%9E%E7%BB%AD%E5%8F%98%E9%87%8F"><span class="nav-number">1.</span> <span class="nav-text">Pearson
皮尔逊相关系数（连续变量）</span></a></li><li class="nav-item nav-level-2"><a class="nav-link" href="#spearman-%E6%96%AF%E7%9A%AE%E5%B0%94%E6%9B%BC%E7%9B%B8%E5%85%B3%E7%B3%BB%E6%95%B0%E7%A7%A9%E5%92%8C%E7%9B%B8%E5%85%B3%E7%B3%BB%E6%95%B0-%E8%BF%9E%E7%BB%AD%E5%8F%98%E9%87%8F"><span class="nav-number">2.</span> <span class="nav-text">Spearman
斯皮尔曼相关系数&#x2F;秩和相关系数 （连续变量）</span></a><ol class="nav-child"><li class="nav-item nav-level-3"><a class="nav-link" href="#%E6%95%B0%E5%AD%A6%E6%8E%A8%E5%AF%BC"><span class="nav-number">2.1.</span> <span class="nav-text">数学推导</span></a></li></ol></li><li class="nav-item nav-level-2"><a class="nav-link" href="#kendall-%E8%82%AF%E5%BE%B7%E5%B0%94%E7%9B%B8%E5%85%B3%E7%B3%BB%E6%95%B0"><span class="nav-number">3.</span> <span class="nav-text">Kendall 肯德尔相关系数</span></a><ol class="nav-child"><li class="nav-item nav-level-3"><a class="nav-link" href="#%E6%95%B0%E5%AD%A6%E6%8E%A8%E5%AF%BC-1"><span class="nav-number">3.1.</span> <span class="nav-text">数学推导</span></a></li></ol></li></ol></div>
      </div>
      <!--/noindex-->

      <div class="site-overview-wrap sidebar-panel">
        <div class="site-author motion-element" itemprop="author" itemscope itemtype="http://schema.org/Person">
  <p class="site-author-name" itemprop="name">Yiriso</p>
  <div class="site-description" itemprop="description">以生物信息学习分享为主</div>
</div>
<div class="site-state-wrap motion-element">
  <nav class="site-state">
      <div class="site-state-item site-state-posts">
          <a href="/archives/">
        
          <span class="site-state-item-count">2</span>
          <span class="site-state-item-name">posts</span>
        </a>
      </div>
      <div class="site-state-item site-state-tags">
        <span class="site-state-item-count">3</span>
        <span class="site-state-item-name">tags</span>
      </div>
  </nav>
</div>



      </div>

    </div>
  </aside>
  <div id="sidebar-dimmer"></div>


      </div>
    </main>

    <footer class="footer">
      <div class="footer-inner">
        

        
  <div class="beian"><a href="http://www.beian.miit.gov.cn/" rel="noopener" target="_blank">苏ICP备2024073618号-1 </a>
  </div>

<div class="copyright">
  
  &copy; 
  <span itemprop="copyrightYear">2024</span>
  <span class="with-love">
    <i class="fa fa-user"></i>
  </span>
  <span class="author" itemprop="copyrightHolder">Yiriso</span>
</div>
  <div class="powered-by">Powered by <a href="https://hexo.io/" class="theme-link" rel="noopener" target="_blank">Hexo</a> & <a href="https://pisces.theme-next.org/" class="theme-link" rel="noopener" target="_blank">NexT.Pisces</a>
  </div>

        








      </div>
    </footer>
  </div>

  
  <script src="/lib/anime.min.js"></script>
  <script src="/lib/velocity/velocity.min.js"></script>
  <script src="/lib/velocity/velocity.ui.min.js"></script>

<script src="/js/utils.js"></script>

<script src="/js/motion.js"></script>


<script src="/js/schemes/pisces.js"></script>


<script src="/js/next-boot.js"></script>




  















  

  
      

<script>
  if (typeof MathJax === 'undefined') {
    window.MathJax = {
      loader: {
        source: {
          '[tex]/amsCd': '[tex]/amscd',
          '[tex]/AMScd': '[tex]/amscd'
        }
      },
      tex: {
        inlineMath: {'[+]': [['$', '$']]},
        tags: 'ams'
      },
      options: {
        renderActions: {
          findScript: [10, doc => {
            document.querySelectorAll('script[type^="math/tex"]').forEach(node => {
              const display = !!node.type.match(/; *mode=display/);
              const math = new doc.options.MathItem(node.textContent, doc.inputJax[0], display);
              const text = document.createTextNode('');
              node.parentNode.replaceChild(text, node);
              math.start = {node: text, delim: '', n: 0};
              math.end = {node: text, delim: '', n: 0};
              doc.math.push(math);
            });
          }, '', false],
          insertedScript: [200, () => {
            document.querySelectorAll('mjx-container').forEach(node => {
              let target = node.parentNode;
              if (target.nodeName.toLowerCase() === 'li') {
                target.parentNode.classList.add('has-jax');
              }
            });
          }, '', false]
        }
      }
    };
    (function () {
      var script = document.createElement('script');
      script.src = '//cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js';
      script.defer = true;
      document.head.appendChild(script);
    })();
  } else {
    MathJax.startup.document.state(0);
    MathJax.texReset();
    MathJax.typeset();
  }
</script>

    

  

</body>
</html>
